{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Convolution as 'running scalar product'\n",
    "\n",
    "Given the assumption of locality, stationarity, and compositionality, we can reduce the amount of computation for a matrix-vector multiplication by using a sparse (because local) Toeplitz/diagonal-constant (because stationary) scheme.\n",
    "In this way we simply end up re-discovering the convolution operator.\n",
    "\n",
    "We also recall that a scalar product is simply a un-normalised cosine distance, which tells us the *alignment* of two vectors.\n",
    "More specifically, we compute the magnitude of the orthogonal projection of one vector onto the other, or *vice versa*."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Let's use a library we've included for reading audio:\n",
    "import librosa"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# My input signal x == x[k], sampling of the real, time continuous, x(t)\n",
    "x, sampling_rate = librosa.load('./res/win_xp_shutdown.wav')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compute the temporal length of x(t)\n",
    "T = x.size / sampling_rate\n",
    "print(\n",
    "    f'x[k] has {x.size} samples',\n",
    "    f'the sampling rate is {sampling_rate * 1e-3}kHz',\n",
    "    f'x(t) is {T:.1f}s long'\n",
    "    , sep='\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Loading interactive visualisation...\n",
    "from res.plot_lib import set_default\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ... in 2 cells \n",
    "set_default(figsize=(16, 8))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create x(t) time span\n",
    "dt = 1 / sampling_rate\n",
    "t = np.r_[0:T:dt]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Try zooming in, in the following chart, and see how the waveform looks like from very close!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualise x(t)\n",
    "plt.figure()\n",
    "plt.plot(t, x)\n",
    "plt.xlabel('time [s]')\n",
    "plt.ylabel('amplitude [/]')\n",
    "plt.title(r'$x(t)$', size=20);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import Jupyter utilities for playing audio\n",
    "from IPython.display import display, Audio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Let's listen to x(t)\n",
    "Audio(x, rate=sampling_rate)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![score](res/score.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compute Short-time Fourier transform (STFT) and convert the amplitude to dB\n",
    "X = librosa.stft(x)\n",
    "X_dB = librosa.amplitude_to_db(np.abs(X))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import the spectroscope function...\n",
    "from librosa.display import specshow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ... and plot\n",
    "plt.figure()\n",
    "plt.subplot(2,1,1)\n",
    "plt.plot(t, x)\n",
    "plt.xlim([0, T])\n",
    "plt.ylabel('amplitude [/]')\n",
    "plt.title('Audio signal x(t) and its spectrogram X(t)')\n",
    "plt.setp(plt.gca().get_xticklabels(), visible=False)\n",
    "plt.subplot(2,1,2)\n",
    "specshow(X_dB, sr=sampling_rate, x_axis='time', y_axis='hz')\n",
    "plt.xlabel('time [s]')\n",
    "plt.ylabel('frequency [Hz]')\n",
    "plt.ylim(top=2000)\n",
    "plt.grid(True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Manual recontruction of the melody:\n",
    "# pick the melody frequencies/notes from the spectrogram above\n",
    "Ab6 = 1661  # Hz\n",
    "Eb6 = 1244  # Hz\n",
    "Ab5 = 830   # Hz\n",
    "Bb5 = 932   # Hz\n",
    "TT = .4  # s\n",
    "tt = np.r_[0:TT:dt]\n",
    "\n",
    "# generate tones\n",
    "A = {\n",
    "    'a^(1)': np.sin(2 * np.pi * Ab6 * tt),\n",
    "    'a^(2)': np.sin(2 * np.pi * Eb6 * tt),\n",
    "    'a^(3)': np.sin(2 * np.pi * Ab5 * tt),\n",
    "    'a^(4)': np.sin(2 * np.pi * Bb5 * tt),\n",
    "}\n",
    "\n",
    "# and concatenate them\n",
    "xx = np.concatenate([a[1] for a in A.items()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Let's listen to the original and the reconstructed\n",
    "display(Audio(x, rate=sampling_rate))\n",
    "display(Audio(xx, rate=sampling_rate))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Let's compute the STFT of the reconstruction\n",
    "XX = librosa.stft(xx)\n",
    "XX_dB = librosa.amplitude_to_db(np.abs(XX))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ... and plot both X(t) and XX(t)\n",
    "plt.figure()\n",
    "t_string = r'$0\\mathrm{s} \\leq t \\leq 1.6\\mathrm{s}$'\n",
    "\n",
    "plt.subplot(1,2,1)\n",
    "specshow(X_dB, sr=sampling_rate, x_axis='time', y_axis='hz')\n",
    "plt.ylim(ymax=2000)\n",
    "plt.ylabel('frequency [Hz]')\n",
    "plt.xlabel('time [s]')\n",
    "plt.grid(True)\n",
    "plt.xlim(right=1.6)\n",
    "plt.title(r'$X(t),\\; $' + t_string, size=20)\n",
    "\n",
    "plt.subplot(1,2,2)\n",
    "specshow(XX_dB, sr=sampling_rate, x_axis='time', y_axis='hz')\n",
    "plt.setp(plt.gca().get_yticklabels(), visible=False)\n",
    "plt.xlabel('time [s]')\n",
    "plt.ylim(top=2000)\n",
    "plt.ylabel('')\n",
    "plt.grid(True)\n",
    "plt.title(r'$\\hat X(t),\\; $' + t_string, size=20);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# What about in the time domain? Let's plot x(t) and a^(i)(t)\n",
    "plt.figure()\n",
    "plt.subplot(5, 1, 1)\n",
    "plt.title('x(t), first melody\\'s note')\n",
    "plt.plot(x, 'C1')\n",
    "plt.xlim([500, 600])\n",
    "plt.ylim([-.2, .2])\n",
    "i = 2\n",
    "for a in A.items():\n",
    "    plt.subplot(5, 1, i)\n",
    "    i += 1\n",
    "    plt.plot(a[1])\n",
    "    plt.xlim([0, 100])\n",
    "    plt.title(a[0], verticalalignment='top', backgroundcolor='black')\n",
    "    if i < 6: \n",
    "        plt.setp(plt.gca().get_xticklabels(), visible=False)\n",
    "\n",
    "plt.ylabel('amplitude [/]')\n",
    "plt.xlabel('samples [/]')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Finally, let's convolve each tune a^(i) with x(t)\n",
    "plt.figure()\n",
    "convs = list()\n",
    "n = 1\n",
    "for a in A.items():\n",
    "    plt.subplot(4, 1, n)\n",
    "    plt.title(rf'$x(t) \\star a^{{({n})}}(t)$', backgroundcolor='black', verticalalignment='top', size=17)\n",
    "    n += 1\n",
    "    convs.append(np.convolve(x, a[1], mode='same'))\n",
    "    plt.plot(t, convs[-1])\n",
    "    if n < 5: \n",
    "        plt.setp(plt.gca().get_xticklabels(), visible=False)\n",
    "plt.ylabel('amplitude [/]')\n",
    "plt.xlabel('time [s]')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Finally, let's listen to these convolutions!\n",
    "for c in convs:\n",
    "    display(Audio(c, rate=sampling_rate))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:dl-minicourse] *",
   "language": "python",
   "name": "conda-env-dl-minicourse-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
